import subprocess
from pathlib import Path
import duckdb
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "notebook"
ROOT = Path.cwd()
DBT_DIR = ROOT.parent / "dbt"
DUCKDB_PATH = ROOT.parent / "ispra.duckdb"
# Grading: rank 1-20, A (top 4), B (5-8), C (9-12), D (13-16), F (17-20)
GRADE_COLORS = {"A": "#6BCB77", "B": "#A8D672", "C": "#D4A84B", "D": "#E07A5F", "F": "#C45A3C"}
def grade_from_rank(rank):
if rank <= 4: return "A"
if rank <= 8: return "B"
if rank <= 12: return "C"
if rank <= 16: return "D"
return "F"
def add_grades(df, value_col, ascending=True):
"""Rank regions and assign grades. ascending=True means lower value = better."""
df = df.sort_values(value_col, ascending=ascending).reset_index(drop=True)
df["rank"] = range(1, len(df) + 1)
df["grade"] = df["rank"].apply(grade_from_rank)
df["color"] = df["grade"].map(GRADE_COLORS)
return df
con = duckdb.connect(DUCKDB_PATH, read_only=True)
# Waste: 2024 snapshot + full time series
waste_2024 = con.sql("""
SELECT regione, recycling_rate_pct, waste_per_capita_kg
FROM fct_waste_by_region WHERE anno = 2024
""").fetchdf()
waste_ts = con.sql("""
SELECT anno, regione, recycling_rate_pct
FROM fct_waste_by_region ORDER BY anno, regione
""").fetchdf()
# Land: aggregate comuni to regions
land = con.sql("""
SELECT regione,
ROUND(AVG(consumed_soil_2024_pct), 2) AS avg_consumed_pct,
ROUND(SUM(total_net_increment_2006_2024_ha), 1) AS total_growth_ha,
ROUND(SUM(net_increment_last_3y_ha), 1) AS recent_growth_ha
FROM fct_land_consumption_ranking
GROUP BY regione
""").fetchdf()
# Air: pivot NO2 and PM10 into columns, compute combined index
air = con.sql("""
SELECT regione,
MAX(CASE WHEN pollutant = 'NO2' THEN avg_mean END) AS no2,
MAX(CASE WHEN pollutant = 'PM10' THEN avg_mean END) AS pm10,
MAX(CASE WHEN pollutant = 'PM2.5' THEN avg_mean END) AS pm25,
ROUND((MAX(CASE WHEN pollutant = 'NO2' THEN avg_mean END)
+ MAX(CASE WHEN pollutant = 'PM10' THEN avg_mean END)) / 2, 1) AS pollution_index
FROM fct_air_quality_by_region
WHERE anno = 2022
GROUP BY regione
""").fetchdf()
# Normalize Friuli name
air["regione"] = air["regione"].str.replace("Friuli Venezia Giulia", "Friuli-Venezia Giulia")
con.close()
print(f"Waste: {len(waste_2024)} regions, Land: {len(land)} regions, Air: {len(air)} regions")
Waste: 20 regions, Land: 20 regions, Air: 20 regions
1. Raccolta Differenziata (Recycling Rate) — 2024¶
Italy's national target is 65%. How do the 20 regions compare?
w = add_grades(waste_2024, "recycling_rate_pct", ascending=False) # higher = better
fig = px.bar(
w, x="recycling_rate_pct", y="regione", orientation="h",
color="grade", color_discrete_map=GRADE_COLORS,
category_orders={"regione": w["regione"].tolist()},
text="recycling_rate_pct",
labels={"recycling_rate_pct": "Recycling Rate (%)", "regione": ""},
title="Recycling Rate by Region (2024)",
)
fig.add_vline(x=65, line_dash="dash", line_color="white", opacity=0.4,
annotation_text="65% national target", annotation_position="top")
fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside", textfont_size=10)
fig.update_layout(
template="plotly_dark", height=600, showlegend=True,
legend_title_text="Grade", yaxis=dict(autorange="reversed"),
margin=dict(l=160),
)
fig.show()
Recycling over time: convergence or divergence?¶
15 years of data (2010–2024) show whether regions are catching up or falling further behind.
# Highlight top 3, bottom 3, and national average
top3 = w.head(3)["regione"].tolist()
bot3 = w.tail(3)["regione"].tolist()
highlight = top3 + bot3
ts_highlight = waste_ts[waste_ts["regione"].isin(highlight)]
ts_avg = waste_ts.groupby("anno")["recycling_rate_pct"].mean().reset_index()
ts_avg["regione"] = "Italy (avg)"
fig = px.line(
ts_highlight, x="anno", y="recycling_rate_pct", color="regione",
labels={"anno": "", "recycling_rate_pct": "Recycling Rate (%)", "regione": ""},
title="Recycling Rate 2010–2024: Best vs Worst Regions",
)
fig.add_scatter(x=ts_avg["anno"], y=ts_avg["recycling_rate_pct"],
mode="lines", name="Italy (avg)",
line=dict(color="white", dash="dot", width=2))
fig.add_hline(y=65, line_dash="dash", line_color="rgba(255,255,255,0.3)",
annotation_text="65% target")
fig.update_layout(template="plotly_dark", height=450)
fig.show()
More waste ≠ better recycling¶
Do regions that generate more waste per capita also recycle more? Or is high waste generation a sign of consumption excess?
fig = px.scatter(
w, x="waste_per_capita_kg", y="recycling_rate_pct",
color="grade", color_discrete_map=GRADE_COLORS,
text="regione", size_max=12,
labels={"waste_per_capita_kg": "Waste per Capita (kg)", "recycling_rate_pct": "Recycling Rate (%)"},
title="Waste Generation vs Recycling Rate (2024)",
)
fig.update_traces(textposition="top center", textfont_size=9)
fig.add_hline(y=65, line_dash="dash", line_color="rgba(255,255,255,0.3)")
fig.update_layout(template="plotly_dark", height=500, showlegend=False)
fig.show()
2. Consumo di Suolo (Land Consumption) — 2024¶
How much of each region's territory has been paved over? And how fast is it still growing?
l = add_grades(land, "avg_consumed_pct", ascending=True) # lower = better
fig = px.bar(
l, x="avg_consumed_pct", y="regione", orientation="h",
color="grade", color_discrete_map=GRADE_COLORS,
category_orders={"regione": l["regione"].tolist()},
text="avg_consumed_pct",
labels={"avg_consumed_pct": "Average Consumed Soil (%)", "regione": ""},
title="Land Consumption by Region (2024)",
)
fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside", textfont_size=10)
fig.update_layout(
template="plotly_dark", height=600, showlegend=True,
legend_title_text="Grade", yaxis=dict(autorange="reversed"),
margin=dict(l=160),
)
fig.show()
Where is growth happening fastest?¶
Total hectares consumed since 2006 vs recent growth (last 3 years). Are the biggest consumers still accelerating?
fig = px.scatter(
l, x="total_growth_ha", y="recent_growth_ha",
color="grade", color_discrete_map=GRADE_COLORS,
text="regione",
labels={"total_growth_ha": "Total Growth Since 2006 (ha)",
"recent_growth_ha": "Growth Last 3 Years (ha)"},
title="Land Consumption: Historical vs Recent Growth",
)
fig.update_traces(textposition="top center", textfont_size=9)
fig.update_layout(template="plotly_dark", height=500, showlegend=False)
fig.show()
3. Qualità dell'Aria (Air Quality) — 2022¶
NO₂ and PM10 concentrations across regions. The Po Valley's geography traps pollutants — does the data confirm it?
a = add_grades(air, "pollution_index", ascending=True) # lower = better
fig = px.bar(
a, x="pollution_index", y="regione", orientation="h",
color="grade", color_discrete_map=GRADE_COLORS,
category_orders={"regione": a["regione"].tolist()},
text="pollution_index",
labels={"pollution_index": "Pollution Index (avg NO₂ + PM10 / 2)", "regione": ""},
title="Air Quality by Region — Combined Pollution Index (2022)",
)
fig.update_traces(texttemplate="%{text:.1f}", textposition="outside", textfont_size=10)
fig.update_layout(
template="plotly_dark", height=600, showlegend=True,
legend_title_text="Grade", yaxis=dict(autorange="reversed"),
margin=dict(l=160),
)
fig.show()
NO₂ vs PM10: different pollutants, different stories¶
NO₂ comes mainly from traffic. PM10 from heating, agriculture, and industry. Which regions suffer from which?
fig = px.scatter(
a, x="no2", y="pm10", color="grade", color_discrete_map=GRADE_COLORS,
text="regione",
labels={"no2": "NO₂ mean (µg/m³)", "pm10": "PM10 mean (µg/m³)"},
title="NO₂ vs PM10 by Region (2022)",
)
fig.update_traces(textposition="top center", textfont_size=9)
# WHO guidelines
fig.add_hline(y=15, line_dash="dot", line_color="rgba(107,203,119,0.4)",
annotation_text="WHO PM10 guideline (15)")
fig.add_vline(x=10, line_dash="dot", line_color="rgba(107,203,119,0.4)",
annotation_text="WHO NO₂ guideline (10)")
fig.update_layout(template="plotly_dark", height=500, showlegend=False)
fig.show()
4. Pagella Finale — Overall Environmental Report Card¶
Each region ranked 1–20 per metric. Grades: A (top 4), B (5–8), C (9–12), D (13–16), F (17–20). Overall grade from average rank.
# Build report card from the three graded DataFrames
report = w[["regione", "rank", "grade"]].rename(columns={"rank": "waste_rank", "grade": "waste_grade"})
report = report.merge(
l[["regione", "rank", "grade"]].rename(columns={"rank": "land_rank", "grade": "land_grade"}),
on="regione")
report = report.merge(
a[["regione", "rank", "grade"]].rename(columns={"rank": "air_rank", "grade": "air_grade"}),
on="regione")
report["avg_rank"] = (report["waste_rank"] + report["land_rank"] + report["air_rank"]) / 3
report = report.sort_values("avg_rank").reset_index(drop=True)
report["overall_rank"] = range(1, 21)
report["overall_grade"] = report["overall_rank"].apply(grade_from_rank)
report[["overall_rank", "regione", "waste_grade", "land_grade", "air_grade", "overall_grade", "avg_rank"]]
| overall_rank | regione | waste_grade | land_grade | air_grade | overall_grade | avg_rank | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | Sardegna | A | A | A | A | 3.000000 |
| 1 | 2 | Basilicata | C | A | A | A | 4.666667 |
| 2 | 3 | Valle d'Aosta | B | A | B | A | 5.666667 |
| 3 | 4 | Umbria | C | B | A | A | 6.333333 |
| 4 | 5 | Trentino-Alto Adige | A | B | D | B | 7.333333 |
| 5 | 6 | Abruzzo | D | B | B | B | 8.666667 |
| 6 | 7 | Calabria | F | B | A | B | 9.333333 |
| 7 | 8 | Marche | B | C | C | B | 10.000000 |
| 8 | 9 | Emilia-Romagna | A | D | D | C | 10.333333 |
| 9 | 10 | Friuli-Venezia Giulia | B | D | C | C | 10.666667 |
| 10 | 11 | Molise | D | A | F | C | 11.000000 |
| 11 | 12 | Toscana | C | C | C | C | 11.333333 |
| 12 | 13 | Piemonte | C | C | D | D | 12.000000 |
| 13 | 14 | Liguria | D | D | C | D | 12.666667 |
| 14 | 15 | Veneto | A | F | F | D | 12.666667 |
| 15 | 16 | Puglia | D | F | B | D | 13.000000 |
| 16 | 17 | Sicilia | F | D | B | F | 14.000000 |
| 17 | 18 | Lazio | F | C | D | F | 14.000000 |
| 18 | 19 | Lombardia | B | F | F | F | 15.000000 |
| 19 | 20 | Campania | F | F | F | F | 18.333333 |
# Heatmap of grades across all metrics
grade_to_num = {"A": 5, "B": 4, "C": 3, "D": 2, "F": 1}
heatmap_data = report.set_index("regione")[["waste_grade", "land_grade", "air_grade", "overall_grade"]]
heatmap_num = heatmap_data.replace(grade_to_num)
fig = go.Figure(data=go.Heatmap(
z=heatmap_num.values,
x=["Rifiuti", "Suolo", "Aria", "Overall"],
y=heatmap_data.index.tolist(),
text=heatmap_data.values,
texttemplate="%{text}",
textfont={"size": 13, "color": "#1A1714"},
colorscale=[[0, "#C45A3C"], [0.25, "#E07A5F"], [0.5, "#D4A84B"], [0.75, "#A8D672"], [1, "#6BCB77"]],
showscale=False,
hovertemplate="Region: %{y}<br>Metric: %{x}<br>Grade: %{text}<extra></extra>",
))
fig.update_layout(
template="plotly_dark", height=700,
title="Environmental Report Card — All Regions",
yaxis=dict(autorange="reversed"),
margin=dict(l=180),
)
fig.show()